--- title: "Global terrorism" date: 2019-05-25T10-24-00 output: md_document: default ---

Intro

Library

library(tidyverse)
library(skimr)
library(maps)
library(gganimate)
library(plotly)
library(streamgraph)

Load data

data <- read_csv("data/globalterrorismdb_0718dist.csv", col_types = cols(
  ransomamtus = col_number(),
  ransomnote = col_character(),
  attacktype3 = col_integer(),
  attacktype3_txt = col_character(),
  claimmode2 = col_integer(),
  claimmode2_txt = col_character(),
  claimmode3 = col_integer(),
  claimmode3_txt = col_character(),
  weaptype4 = col_integer(),
  weaptype4_txt = col_character(),
  weapsubtype4 = col_integer(),
  weapsubtype4_txt = col_character(),
  gname3 = col_character(),
  gsubname3 = col_character(),
  gsubname2 = col_character(),
  ransompaidus = col_number(),
  compclaim = col_integer()
))

# For world map

world <- map_data("world")
worldmap <- ggplot(data = world, aes(long, lat)) + borders("world") +
  theme(panel.background = element_blank(),
                   axis.title = element_blank(),
                   axis.line.x = element_blank(),
                   axis.ticks = element_blank(),
                   axis.text = element_blank()) +
   coord_fixed(1.2)

Data dimensions

dim(data)
## [1] 181691    135

First glance

colnames(data)
##   [1] "eventid"            "iyear"              "imonth"            
##   [4] "iday"               "approxdate"         "extended"          
##   [7] "resolution"         "country"            "country_txt"       
##  [10] "region"             "region_txt"         "provstate"         
##  [13] "city"               "latitude"           "longitude"         
##  [16] "specificity"        "vicinity"           "location"          
##  [19] "summary"            "crit1"              "crit2"             
##  [22] "crit3"              "doubtterr"          "alternative"       
##  [25] "alternative_txt"    "multiple"           "success"           
##  [28] "suicide"            "attacktype1"        "attacktype1_txt"   
##  [31] "attacktype2"        "attacktype2_txt"    "attacktype3"       
##  [34] "attacktype3_txt"    "targtype1"          "targtype1_txt"     
##  [37] "targsubtype1"       "targsubtype1_txt"   "corp1"             
##  [40] "target1"            "natlty1"            "natlty1_txt"       
##  [43] "targtype2"          "targtype2_txt"      "targsubtype2"      
##  [46] "targsubtype2_txt"   "corp2"              "target2"           
##  [49] "natlty2"            "natlty2_txt"        "targtype3"         
##  [52] "targtype3_txt"      "targsubtype3"       "targsubtype3_txt"  
##  [55] "corp3"              "target3"            "natlty3"           
##  [58] "natlty3_txt"        "gname"              "gsubname"          
##  [61] "gname2"             "gsubname2"          "gname3"            
##  [64] "gsubname3"          "motive"             "guncertain1"       
##  [67] "guncertain2"        "guncertain3"        "individual"        
##  [70] "nperps"             "nperpcap"           "claimed"           
##  [73] "claimmode"          "claimmode_txt"      "claim2"            
##  [76] "claimmode2"         "claimmode2_txt"     "claim3"            
##  [79] "claimmode3"         "claimmode3_txt"     "compclaim"         
##  [82] "weaptype1"          "weaptype1_txt"      "weapsubtype1"      
##  [85] "weapsubtype1_txt"   "weaptype2"          "weaptype2_txt"     
##  [88] "weapsubtype2"       "weapsubtype2_txt"   "weaptype3"         
##  [91] "weaptype3_txt"      "weapsubtype3"       "weapsubtype3_txt"  
##  [94] "weaptype4"          "weaptype4_txt"      "weapsubtype4"      
##  [97] "weapsubtype4_txt"   "weapdetail"         "nkill"             
## [100] "nkillus"            "nkillter"           "nwound"            
## [103] "nwoundus"           "nwoundte"           "property"          
## [106] "propextent"         "propextent_txt"     "propvalue"         
## [109] "propcomment"        "ishostkid"          "nhostkid"          
## [112] "nhostkidus"         "nhours"             "ndays"             
## [115] "divert"             "kidhijcountry"      "ransom"            
## [118] "ransomamt"          "ransomamtus"        "ransompaid"        
## [121] "ransompaidus"       "ransomnote"         "hostkidoutcome"    
## [124] "hostkidoutcome_txt" "nreleased"          "addnotes"          
## [127] "scite1"             "scite2"             "scite3"            
## [130] "dbsource"           "INT_LOG"            "INT_IDEO"          
## [133] "INT_MISC"           "INT_ANY"            "related"

Deal with latitude and longtitude

data %>% select(longitude, latitude) %>%  skim()
## Skim summary statistics
##  n obs: 181691 
##  n variables: 2 
## 
## -- Variable type:numeric --------------------------------------------------------
##   variable missing complete      n   mean       sd        p0   p25   p50
##   latitude    4556   177135 181691   23.5    18.57 -53.15    11.51 31.47
##  longitude    4557   177134 181691 -458.7 2e+05     -8.6e+07  4.55 43.25
##    p75   p100     hist
##  34.69  74.63 <U+2581><U+2581><U+2581><U+2583><U+2583><U+2587><U+2582><U+2581>
##  68.71 179.37 <U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2587>
data %>% filter(is.na(latitude)) %>% select(location, city, provstate, country_txt, specificity) %>% filter(is.na(specificity))
## # A tibble: 6 x 5
##   location                        city    provstate country_txt specificity
##   <chr>                           <chr>   <chr>     <chr>             <dbl>
## 1 <NA>                            Dhupgu~ West Ben~ India                NA
## 2 Wazir Akhbar Khan neighborhood  Kabul   Kabul     Afghanistan          NA
## 3 The kidnapping took place in M~ Mogadi~ Banaadir  Somalia              NA
## 4 The attack took place in the L~ Muang   Yala      Thailand             NA
## 5 Near Shahrah-e-Faisal           Karachi Sindh     Pakistan             NA
## 6 Under a bridge near Tirumangal~ Alampa~ Tamil Na~ India                NA
data %>% map_df(~ str_detect(.,"Unknown" )) %>% summarise_all(sum, na.rm = TRUE)
## # A tibble: 1 x 135
##   eventid iyear imonth  iday approxdate extended resolution country
##     <int> <int>  <int> <int>      <int>    <int>      <int>   <int>
## 1       0     0      0     0          0        0          0       0
## # ... with 127 more variables: country_txt <int>, region <int>,
## #   region_txt <int>, provstate <int>, city <int>, latitude <int>,
## #   longitude <int>, specificity <int>, vicinity <int>, location <int>,
## #   summary <int>, crit1 <int>, crit2 <int>, crit3 <int>, doubtterr <int>,
## #   alternative <int>, alternative_txt <int>, multiple <int>,
## #   success <int>, suicide <int>, attacktype1 <int>,
## #   attacktype1_txt <int>, attacktype2 <int>, attacktype2_txt <int>,
## #   attacktype3 <int>, attacktype3_txt <int>, targtype1 <int>,
## #   targtype1_txt <int>, targsubtype1 <int>, targsubtype1_txt <int>,
## #   corp1 <int>, target1 <int>, natlty1 <int>, natlty1_txt <int>,
## #   targtype2 <int>, targtype2_txt <int>, targsubtype2 <int>,
## #   targsubtype2_txt <int>, corp2 <int>, target2 <int>, natlty2 <int>,
## #   natlty2_txt <int>, targtype3 <int>, targtype3_txt <int>,
## #   targsubtype3 <int>, targsubtype3_txt <int>, corp3 <int>,
## #   target3 <int>, natlty3 <int>, natlty3_txt <int>, gname <int>,
## #   gsubname <int>, gname2 <int>, gsubname2 <int>, gname3 <int>,
## #   gsubname3 <int>, motive <int>, guncertain1 <int>, guncertain2 <int>,
## #   guncertain3 <int>, individual <int>, nperps <int>, nperpcap <int>,
## #   claimed <int>, claimmode <int>, claimmode_txt <int>, claim2 <int>,
## #   claimmode2 <int>, claimmode2_txt <int>, claim3 <int>,
## #   claimmode3 <int>, claimmode3_txt <int>, compclaim <int>,
## #   weaptype1 <int>, weaptype1_txt <int>, weapsubtype1 <int>,
## #   weapsubtype1_txt <int>, weaptype2 <int>, weaptype2_txt <int>,
## #   weapsubtype2 <int>, weapsubtype2_txt <int>, weaptype3 <int>,
## #   weaptype3_txt <int>, weapsubtype3 <int>, weapsubtype3_txt <int>,
## #   weaptype4 <int>, weaptype4_txt <int>, weapsubtype4 <int>,
## #   weapsubtype4_txt <int>, weapdetail <int>, nkill <int>, nkillus <int>,
## #   nkillter <int>, nwound <int>, nwoundus <int>, nwoundte <int>,
## #   property <int>, propextent <int>, propextent_txt <int>,
## #   propvalue <int>, ...

longitude value appear to be incorrect at -8.6e+07. By compare the location with actual coordinate on google, it is obvious that a decimal separator is missing. The code below will fix that by changing it to -86.185896

data %>% filter(longitude == min(data$longitude, na.rm = T))
## # A tibble: 1 x 135
##   eventid iyear imonth  iday approxdate extended resolution country
##     <dbl> <dbl>  <dbl> <dbl> <chr>         <dbl> <chr>        <dbl>
## 1 1.98e11  1982     12    24 <NA>              0 <NA>           145
## # ... with 127 more variables: country_txt <chr>, region <dbl>,
## #   region_txt <chr>, provstate <chr>, city <chr>, latitude <dbl>,
## #   longitude <dbl>, specificity <dbl>, vicinity <dbl>, location <chr>,
## #   summary <chr>, crit1 <dbl>, crit2 <dbl>, crit3 <dbl>, doubtterr <dbl>,
## #   alternative <dbl>, alternative_txt <chr>, multiple <dbl>,
## #   success <dbl>, suicide <dbl>, attacktype1 <dbl>,
## #   attacktype1_txt <chr>, attacktype2 <dbl>, attacktype2_txt <chr>,
## #   attacktype3 <int>, attacktype3_txt <chr>, targtype1 <dbl>,
## #   targtype1_txt <chr>, targsubtype1 <dbl>, targsubtype1_txt <chr>,
## #   corp1 <chr>, target1 <chr>, natlty1 <dbl>, natlty1_txt <chr>,
## #   targtype2 <dbl>, targtype2_txt <chr>, targsubtype2 <dbl>,
## #   targsubtype2_txt <chr>, corp2 <chr>, target2 <chr>, natlty2 <dbl>,
## #   natlty2_txt <chr>, targtype3 <dbl>, targtype3_txt <chr>,
## #   targsubtype3 <dbl>, targsubtype3_txt <chr>, corp3 <chr>,
## #   target3 <chr>, natlty3 <dbl>, natlty3_txt <chr>, gname <chr>,
## #   gsubname <chr>, gname2 <chr>, gsubname2 <chr>, gname3 <chr>,
## #   gsubname3 <chr>, motive <chr>, guncertain1 <dbl>, guncertain2 <dbl>,
## #   guncertain3 <lgl>, individual <dbl>, nperps <dbl>, nperpcap <dbl>,
## #   claimed <dbl>, claimmode <dbl>, claimmode_txt <chr>, claim2 <dbl>,
## #   claimmode2 <int>, claimmode2_txt <chr>, claim3 <lgl>,
## #   claimmode3 <int>, claimmode3_txt <chr>, compclaim <int>,
## #   weaptype1 <dbl>, weaptype1_txt <chr>, weapsubtype1 <dbl>,
## #   weapsubtype1_txt <chr>, weaptype2 <dbl>, weaptype2_txt <chr>,
## #   weapsubtype2 <dbl>, weapsubtype2_txt <chr>, weaptype3 <dbl>,
## #   weaptype3_txt <chr>, weapsubtype3 <dbl>, weapsubtype3_txt <chr>,
## #   weaptype4 <int>, weaptype4_txt <chr>, weapsubtype4 <int>,
## #   weapsubtype4_txt <chr>, weapdetail <chr>, nkill <dbl>, nkillus <dbl>,
## #   nkillter <dbl>, nwound <dbl>, nwoundus <dbl>, nwoundte <dbl>,
## #   property <dbl>, propextent <dbl>, propextent_txt <chr>,
## #   propvalue <dbl>, ...
data[data$eventid == 198212240004, "longitude"] <- -86.185896

Deal with missing longitude and latitude by using the country location as the incident location. The country location is obtained by using Mapquest API.

baseurl <- "http://www.mapquestapi.com/geocoding/v1/address?"


country <- data %>%  
  filter(is.na(longitude)) %>%
  distinct(country_txt)

country <- country %>% mutate(location = str_replace_all(country_txt, " ", "+"))

country$location[country$location == "North+Yemen"] <- "Yemen"
country$location[country$location == "South+Yemen"] <- "Yemen"
country$location[country$location == "West+Germany+(FRG)"] <- "Germany"
country$location[country$location == "Yugoslavia"] <- "Croatia"
country$location[country$location == "Zaire"] <- "Democratic+Republic+of+the+Congo"
country$location[country$location == "Soviet+Union"] <- "Russia"
country$location[country$location == "West+Bank+and+Gaza+Strip"] <- "Gaza+Strip"
country$location[country$location == "Rhodesia"] <- "Zimbabwe"
country$location[country$location == "Czechoslovakia"] <- "Czech+Republic"

url <- str_c(baseurl, "key=", KEY, "&location=", country$location, "&outFormat=csv")

missing <- url %>%
  map_dfr( ~ read_csv(.) %>%
             select(Lat, Lng, GeocodeQuality, GeocodeQualityCode, Country) %>%
             filter(GeocodeQuality == "COUNTRY"))

country <- bind_cols(country,missing)

temp <- data %>%
  filter(is.na(longitude)) %>%
  select(eventid, country_txt) %>%
  left_join(country) %>%  
  select(eventid, Lng, Lat)

data <- data %>% left_join(temp, by = "eventid") %>%
  mutate(longitude = ifelse(is.na(longitude), Lng, longitude)) %>%
  mutate(latitude = ifelse(is.na(latitude), Lat, latitude))

Missing date and month in the data were recorded as NA, change them to 1. Then create a new column called idate which combined year, month and date together

data$iday[data$iday == 0] <- 1
data$imonth[data$imonth == 0] <- 1

data$idate <- data %>% unite(date, iyear, imonth, iday, sep = "-") %>%
  mutate(date = lubridate::ymd(date)) %>%
  pull(date)

Group name contain non UTF-8 characters. The following code will fix it

data <- data %>%
  mutate_at(vars(gname, gname2, gname3), function(x){gsub('[^ -~]', '', x)})

Create a label for leaflet map by making new column called popmap

data$popmap <- data %>%
  mutate(attacktype2_txt = ifelse(is.na(attacktype2_txt)," ", str_c(", ", attacktype2_txt)),
         attacktype3_txt = ifelse(is.na(attacktype3_txt)," ", str_c(", ", attacktype3_txt)),
         weaptype2_txt = ifelse(is.na(weaptype2_txt)," ", str_c(", ", weaptype2_txt)),
         weaptype3_txt = ifelse(is.na(weaptype3_txt)," ", str_c(", ", weaptype3_txt)),
         targtype2_txt = ifelse(is.na(targtype2_txt)," ", str_c(", ", targtype2_txt)),
         targtype3_txt = ifelse(is.na(targtype3_txt)," ", str_c(", ", targtype3_txt)),
         gname2 = ifelse(is.na(gname2)," ", str_c(", ", gname2)),
         gname3 = ifelse(is.na(gname3)," ", str_c(", ", gname3))) %>%
  mutate(popmap = str_c("Country: ", country_txt, " <br/> ",
                        "Date: ", idate, " <br/> ",
                        "Attack type: ", attacktype1_txt, attacktype2_txt, attacktype3_txt, " <br/> ",
                        "Weapon: ", weaptype1_txt, weaptype2_txt, weaptype3_txt, " <br/> ",
                        "Target: ", targtype1_txt, targtype2_txt, targtype3_txt, " <br/> ",
                        "Group responsible: ", gname, gname2, gname3, " <br/> ",
                        "Casualty: ", str_replace_na(nkill), " <br/> ",
                        "Injured: ", str_replace_na(nwound), " <br/> ",
                        "Property damage: ", str_replace_na(propextent_txt))) %>% pull(popmap)

Data exploration

data <- read_csv("data/newdat.csv", col_types = cols(
  iyear = col_integer(),
  latitude = col_double(),
  longitude = col_double(),
  attacktype3_txt = col_character(),
  gname3 = col_character(),
  nkill = col_integer(),
  nwound = col_integer()
))
## Warning: 2 parsing failures.
##    row    col               expected actual              file
## 109208 nwound no trailing characters     .5 'data/newdat.csv'
## 109209 nwound no trailing characters     .5 'data/newdat.csv'

Leaflet

library(leaflet)

data %>%
  filter(idate == "2000-01-01") %>% 
  leaflet() %>%
      addTiles() %>%
      addMarkers(~longitude, ~latitude, label = ~data$popmap %>% purrr::map(shiny::HTML) ,
                 clusterOptions = markerClusterOptions(disableClusteringAtZoom = 12)) %>%
      setMaxBounds(~min(data$longitude), ~min(data$latitude), ~max(data$longitude), ~max(data$latitude)) %>%
      setView(0,0, 2)